Root_Atlas.rds (or get it by running through notebook 2, 3, 4, 5 & 6)
Protoplasting_DEgene_FC2_list.txt
cell_cycle_genes.csv
color_scheme_at.RData
Validated_Markers.csv
spatial_anno.txt (get it by running through notebook 3)
semitones.curated.marker.anno.nsds5.csv (get it by running through notebook 4)
semitones.curated.marker.csv (get it by running through notebook 4)
ici_raw_20200414.csv.bz2 (get it by running through notebook 5)
rm(list=ls())
# Set the working directory to where folders named after the samples are located.
# The folder contains spliced.mtx, unspliced.mtx, barcodes and gene id files, and json files produced by scKB that documents the sequencing stats.
setwd("/scratch/AG_Ohler/CheWei/scKB")
# Load libraries
suppressMessages(library(Seurat))
suppressMessages(library(scales))
suppressMessages(library(data.table))
suppressMessages(library(tidyverse))
suppressMessages(library(ggplot2))
suppressMessages(library(grid))
sessionInfo()
R version 4.0.3 (2020-10-10) Platform: x86_64-conda-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS/LAPACK: /fast/home/c/chsu/anaconda3/envs/seu3/lib/libopenblasp-r0.3.12.so locale: [1] LC_CTYPE=en_US.utf-8 LC_NUMERIC=C [3] LC_TIME=en_US.utf-8 LC_COLLATE=en_US.utf-8 [5] LC_MONETARY=en_US.utf-8 LC_MESSAGES=en_US.utf-8 [7] LC_PAPER=en_US.utf-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.utf-8 LC_IDENTIFICATION=C attached base packages: [1] grid stats graphics grDevices utils datasets methods [8] base other attached packages: [1] forcats_0.5.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4 [5] readr_1.4.0 tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2 [9] tidyverse_1.3.0 data.table_1.13.2 scales_1.1.1 Seurat_3.1.5 loaded via a namespace (and not attached): [1] nlme_3.1-150 tsne_0.1-3 fs_1.5.0 [4] matrixStats_0.57.0 lubridate_1.7.9 RcppAnnoy_0.0.16 [7] RColorBrewer_1.1-2 httr_1.4.2 repr_1.1.0 [10] sctransform_0.3.1 tools_4.0.3 backports_1.2.0 [13] R6_2.5.0 irlba_2.3.3 KernSmooth_2.23-18 [16] uwot_0.1.8 DBI_1.1.0 lazyeval_0.2.2 [19] colorspace_2.0-0 withr_2.3.0 tidyselect_1.1.0 [22] gridExtra_2.3 compiler_4.0.3 cli_2.1.0 [25] rvest_0.3.6 xml2_1.3.2 plotly_4.9.2.1 [28] lmtest_0.9-38 ggridges_0.5.2 pbapply_1.4-3 [31] pbdZMQ_0.3-3.1 digest_0.6.27 base64enc_0.1-3 [34] pkgconfig_2.0.3 htmltools_0.5.0 parallelly_1.21.0 [37] dbplyr_2.0.0 readxl_1.3.1 htmlwidgets_1.5.2 [40] rlang_0.4.8 rstudioapi_0.13 generics_0.1.0 [43] zoo_1.8-8 jsonlite_1.7.1 ica_1.0-2 [46] magrittr_1.5 patchwork_1.1.0 Matrix_1.2-18 [49] fansi_0.4.1 Rcpp_1.0.5 IRkernel_1.1.1.9000 [52] munsell_0.5.0 ape_5.4-1 reticulate_1.18 [55] lifecycle_0.2.0 stringi_1.5.3 MASS_7.3-53 [58] Rtsne_0.15 plyr_1.8.6 parallel_4.0.3 [61] listenv_0.8.0 ggrepel_0.8.2 crayon_1.3.4 [64] lattice_0.20-41 haven_2.3.1 IRdisplay_0.7.0 [67] cowplot_1.1.0 splines_4.0.3 hms_0.5.3 [70] ps_1.4.0 pillar_1.4.6 igraph_1.2.6 [73] uuid_0.1-4 future.apply_1.6.0 reshape2_1.4.4 [76] codetools_0.2-18 leiden_0.3.5 reprex_0.3.0 [79] glue_1.4.2 evaluate_0.14 modelr_0.1.8 [82] png_0.1-7 vctrs_0.3.4 cellranger_1.1.0 [85] gtable_0.3.0 RANN_2.6.1 assertthat_0.2.1 [88] future_1.20.1 rsvd_1.0.3 broom_0.7.2 [91] survival_3.2-7 viridisLite_0.3.0 cluster_2.1.0 [94] globals_0.13.1 fitdistrplus_1.1-1 ellipsis_0.3.1 [97] ROCR_1.0-11
# Load unwanted genes, which are genes being induced during protoplasting in our case
pp.genes <- as.character(read.table("./supp_data/Protoplasting_DEgene_FC2_list.txt", header=F)$V1)
cc.genes <- as.character(read.table("./supp_data/cell_cycle_genes.csv", header=TRUE, sep=",")$Locus)
# Load the color scheme for time and cell type annotation
load("./supp_data/color_scheme_at.RData")
# Load known markers, including both wet-lab validated and in-silico-derived
known.good.markers <- read.csv("./supp_data/Validated_Markers.csv", header=T)
# Load novoSpaRc output
spa <- read.table("./supp_data/spatial_anno.txt", sep="\t", header=TRUE)
# Load SEMITONES output
escore <- read.csv("./supp_data/semitones.curated.marker.anno.nsds5.csv", header=T)
escore <- escore +1
es <- fread("./supp_data/semitones.curated.marker.csv", header = T)
# Load ICI output
ici_raw <- fread("./supp_data/ici_raw_20200414.csv.bz2")
# Read in the atlas
rc.integrated <- readRDS('./supp_data/Root_Atlas.rds')
zscore <- function(x){(x-mean(x))/sd(x)}
known.good.markers$Celltype <- gsub("Columella (1-4th Outer Layer) + LRC (3 Vertical Layers from Columella)", "Columella", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Columella (1st Outer Layer) + LRC (1st +2nd Outer Layer)", "Root Cap", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Columella (1st Outer Layer) + LRC (1st Outer Layer)", "Root Cap", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Columella (1-3th Outer Layer)", "Columella", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Columella (1-5th Outer Layer)", "Columella", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Columella + LRC", "Root Cap", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Cortex + Endodermis", "Ground Tissue", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Procambium + Phloem", "Procambium", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Procambium + Xylem", "Procambium", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Quiescent Center + Columella", "Columella", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("LRC + Atrichoblast", "Atrichoblast", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Phloem", "Metaphloem & Companion Cell", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Metaphloem & Companion Cell;Metaphloem & Companion Cell Pole Pericycle", "Phloem Pole Pericycle", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("PSE", "Phloem Sieve Element", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Proximal Columella", "Columella", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Distal Columella", "Columella", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
known.good.markers$Celltype <- gsub("Pericycle Initials", "Xylem Pole Pericycle", known.good.markers$Celltype, ignore.case = FALSE, perl = FALSE,fixed = T, useBytes = FALSE);
msc <- c()
for (i in as.character(unique(known.good.markers$Celltype))){
if (length(known.good.markers[which(known.good.markers$Celltype== i),]$Locus)>1){
msc <- cbind(msc, as.numeric(apply(apply(rc.integrated@assays$SCT@data[known.good.markers[which(known.good.markers$Celltype== i),]$Locus,], 1, zscore), 1, mean)))
} else {
msc <- cbind(msc, as.numeric(zscore(rc.integrated@assays$SCT@data[known.good.markers[which(known.good.markers$Celltype== i),]$Locus,])))
}
}
colnames(msc) <- as.character(unique(known.good.markers$Celltype))
rownames(msc) <- colnames(rc.integrated)
# Find clusters, here we choose Leiden clustering algorithm with resolution 0.5. Parameter "algorithm": 1 = original Louvain algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM algorithm; 4 = Leiden algorithm
suppressMessages(suppressWarnings(
rc.integrated <- FindClusters(rc.integrated, resolution = 500, algorithm = 3)
))
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 110427 Number of edges: 3393008 Running smart local moving algorithm... Maximum modularity in 10 random starts: 0.4077 Number of communities: 3034 Elapsed time: 103 seconds
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap")+NoLegend()
anno <- rc.integrated$seurat_clusters
for (i in unique(rc.integrated$seurat_clusters)){
if (max(apply(msc[which(rc.integrated$seurat_clusters==i),],2,mean))>0){
ct <- names(which.max(apply(msc[which(rc.integrated$seurat_clusters==i),],2,mean)))
} else {
ct <- "NA"
}
anno <- gsub(paste0("^",i,"$"), ct, anno, ignore.case = FALSE, perl = FALSE,fixed = FALSE, useBytes = FALSE)
}
rc.integrated$score.anno <- anno
# Plot marker annotation
order <- c("Quiescent Center", "Ground Tissue","Columella", "Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Metaphloem & Companion Cell", "Xylem", "Procambium","Xylem Pole Pericycle","Phloem Pole Pericycle","Phloem Sieve Element", "Protoxylem", "Metaxylem", "NA")
palette <- c("#9400d3", "#DCD0FF","#5ab953", "#bfef45", "#008080", "#21B6A8", "#82b6ff", "#0000FF","#e6194b", "#9a6324", "#ffe119", "#ff9900", "#ffd4e3", "#dd77ec", "#9a6324", "#ddaa6f", "#EEEEEE")
rc.integrated$score.anno <- factor(rc.integrated$score.anno , levels = order[sort(match(unique(rc.integrated$score.anno),order))])
color <- palette[sort(match(unique(rc.integrated$score.anno),order))]
options(repr.plot.width=12, repr.plot.height=10)
DimPlot(rc.integrated, reduction = "umap", group.by = "score.anno", cols = color)+ggtitle("Z-Score Annotation")
# A cell is annotated as the label which its marker has the highest escore
call.index <- apply(es, 2, function(x){colnames(escore)[which(x==max(x))]});
pos.index <- apply(es, 2, function(x){if(max(x)<0){"negative"}else{"positive"}})
escore.anno <- c()
for (i in colnames(escore)){
ea <- rep('NA',ncol(rc.integrated))
ea[escore[,i][!is.na(escore[,i])]] = as.character(known.good.markers$Celltype[match(i,known.good.markers$Locus)])
escore.anno <- cbind(escore.anno,ea)
}
spec.index <- apply(escore.anno,1,function(x){length(table(x[x!="NA"]))})
escore.anno <- rep('NA',ncol(rc.integrated))
for (i in 1:length(call.index)){
if(pos.index[i] == "positive"){
escore.anno[i] = as.character(known.good.markers$Celltype[match(call.index[i],known.good.markers$Locus)])
}
}
escore.anno[which(spec.index == 0)] <- 'NA'
# Store marker annotation results in atlas
rc.integrated$escore.anno <- escore.anno
es <- as.matrix(es)
rownames(es) <- colnames(escore)
colnames(es) <- colnames(rc.integrated)
rc.integrated$escore.crude.anno <- escore.anno
# Plot marker annotation
order <- c("Quiescent Center", "Ground Tissue","Columella", "Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Metaphloem & Companion Cell", "Xylem", "Procambium","Xylem Pole Pericycle","Phloem Pole Pericycle","Phloem Sieve Element", "Protoxylem", "Metaxylem", "NA")
palette <- c("#9400d3", "#DCD0FF","#5ab953", "#bfef45", "#008080", "#21B6A8", "#82b6ff", "#0000FF","#e6194b", "#9a6324", "#ffe119", "#ff9900", "#ffd4e3", "#dd77ec", "#9a6324", "#ddaa6f", "#EEEEEE")
rc.integrated$escore.crude.anno <- factor(rc.integrated$escore.crude.anno , levels = order[sort(match(unique(rc.integrated$escore.crude.anno),order))])
color <- palette[sort(match(unique(rc.integrated$escore.crude.anno),order))]
options(repr.plot.width=12, repr.plot.height=10)
DimPlot(rc.integrated, reduction = "umap", group.by = "escore.crude.anno", cols = color)+ggtitle("SEMITONES nsds 5 Crude Marker Annotation")
rc.integrated@meta.data <- cbind(rc.integrated@meta.data, spa)
order <- c("Quiescent Center", "Stem Cell Niche", "Columella", "Lateral Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Pericycle", "Phloem", "Xylem", "Procambium", "Unknown")
palette <- c("#9400d3", "#DCD0FF", "#5AB953", "#BFEF45", "#008080", "#21B6A8", "#82B6FF", "#0000FF","#FF9900","#E6194B", "#9A6324", "#FFE119","#FFFFFF")
rc.integrated$novo.anno <- factor(rc.integrated$novo.anno, levels = order[sort(match(unique(rc.integrated$novo.anno),order))])
color <- palette[sort(match(unique(rc.integrated$novo.anno),order))]
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", group.by = "novo.anno", cols=color, pt.size=0.5)
# Select best ICI annotation based on adjusted-pvale and normalized ICI score
ici_filtered <- ici_raw %>% group_by(Cell) %>% filter(p_adj == min(p_adj)) %>% filter(ici_score_norm == max(ici_score_norm)) %>% filter(p_val == max(p_val)) %>% filter(ici_score == max(ici_score))
# Select samples
ici_atlas <- ici_filtered %>% filter(Dataset == 'sc_12'|Dataset == 'sc_11'|Dataset == 'sc_30'|Dataset == 'sc_31'|Dataset == 'sc_37'|Dataset == 'sc_40'|Dataset == 'sc_51'|Dataset == 'sc_9_at'|Dataset == 'sc_10_at'|Dataset == 'sc_1'|Dataset == 'tnw1'|Dataset == 'tnw2'|Dataset == 'col0'|Dataset == 'pp1'|Dataset == 'dc1'|Dataset == 'dc2')
# Store ICI annotation results in atlas
ord <- unique(rc.integrated$orig.ident)
idx <- c()
for (i in 1:length(ord)){
idx <- c(idx,gsub(paste0("_",i,"$"),"",gsub("^",paste0(ord[i],"_"),colnames(rc.integrated)[grep(paste0("_",i,"$"),colnames(rc.integrated))])))
}
rownames(ici_atlas) <- ici_atlas$Cell
rc.integrated$ici_celltype <- ici_atlas[idx,]$Cell_Type;
rc.integrated$ici_celltype_score <- ici_atlas[idx,]$ici_score;
rc.integrated$ici_celltype_p_val <- ici_atlas[idx,]$p_val;
rc.integrated$ici_celltype_score_norm <- ici_atlas[idx,]$ici_score_norm;
rc.integrated$ici_celltype_p_adj <- ici_atlas[idx,]$p_adj;
ici.order <- unique(rc.integrated$ici_celltype)
# Merge labels
rc.integrated$ici_celltype[which(rc.integrated$ici_celltype == "Columella_Stem")]="Columella";
rc.integrated$ici_celltype[which(rc.integrated$ici_celltype == "Ground_Cell_Stem")]="Unknown";
rc.integrated$ici_celltype[which(rc.integrated$ici_celltype == "Epidermis_LRC_Stem")]="Lateral_Root_Cap";
rc.integrated$ici_celltype[which(rc.integrated$ici_celltype == "Xylem_Stem")]="Meristematic_Xylem";
rc.integrated$ici_celltype[is.na(rc.integrated$ici_celltype)] = "Unknown"
Warning message: “Setting row names on a tibble is deprecated.”
# Plot ICI annotation
ici.order <- unique(rc.integrated$ici_celltype)
ici.palette <- c('#C7EA46','#FF9900','#B67721','#21B6A8','#0082C8','#FE7F9C','#FF00FF','#0000FF','#008081','#FF0800','#C0C0C0','#5AB953','#9400D3','#7EF9FF')
rc.integrated$ici_celltype <- factor(rc.integrated$ici_celltype, levels = ici.order[sort(match(unique(rc.integrated$ici_celltype),ici.order))])
color.ici <- ici.palette[sort(match(unique(rc.integrated$ici_celltype),ici.order))]
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", group.by = "ici_celltype", cols = color.ici)+ggtitle("ici annotation")
rc.integrated$ici_significance <- rep("> 0.01", nrow(rc.integrated@meta.data))
rc.integrated$ici_significance[which(rc.integrated$ici_celltype_p_adj <= 0.01)] = "<= 0.01"
DimPlot(rc.integrated, reduction = "umap", group.by = "ici_significance", order = c("<= 0.01","> 0.01"),cols = c("#cccccc", "#008081"))
# Plot correlation-based annotation
color <- celltypepalette[sort(match(unique(rc.integrated$celltype.ID.P),celltypeorder))]
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.ID.P", cols = color)+ggtitle("RNA-seq correlation annotation")
rc.integrated$celltype.ID.P_significance <- rep("> 0.01", nrow(rc.integrated@meta.data))
rc.integrated$celltype.ID.P_significance[which(rc.integrated$celltype.pvalue.P <= 0.01)] = "<= 0.01"
rc.integrated$celltype.cor.P_significance <- rep(">= 0.6", nrow(rc.integrated@meta.data))
rc.integrated$celltype.cor.P_significance[which(rc.integrated$celltype.cor.P < 0.6)] = "< 0.6"
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.ID.P_significance", order = c("<= 0.01","> 0.01"),cols = c("#cccccc", "#ff4040"))
FeaturePlot(rc.integrated, reduction = "umap", features = "celltype.cor.P")
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.cor.P_significance", order = c(">= 0.6","< 0.6"),cols = c("#cccccc", "#008081"))
rc.integrated$Rad.ID.P <- factor(rc.integrated$Rad.ID.P, levels = radorder[sort(match(unique(rc.integrated$Rad.ID.P),radorder))])
color <- radpalette[sort(match(unique(rc.integrated$Rad.ID.P),radorder))]
DimPlot(rc.integrated, reduction = "umap", group.by = "Rad.ID.P", cols = color)+ggtitle("Microarray correlation annotation")
rc.integrated$Rad.ID.P_significance <- rep("> 0.01", nrow(rc.integrated@meta.data))
rc.integrated$Rad.ID.P_significance[which(rc.integrated$Rad.pvalue.P <= 0.01)] = "<= 0.01"
rc.integrated$Rad.cor.P_significance <- rep(">= 0.6", nrow(rc.integrated@meta.data))
rc.integrated$Rad.cor.P_significance[which(rc.integrated$Rad.cor.P < 0.6)] = "< 0.6"
DimPlot(rc.integrated, reduction = "umap", group.by = "Rad.ID.P_significance", order = c("<= 0.01","> 0.01"),cols = c("#cccccc", "#ff4040"))
FeaturePlot(rc.integrated, reduction = "umap", features = "Rad.cor.P")
DimPlot(rc.integrated, reduction = "umap", group.by = "Rad.cor.P_significance", order = c(">= 0.6","< 0.6"),cols = c("#cccccc", "#008081"))
# Correlation-based annotation
hc.rna.anno <- as.character(rc.integrated$celltype.ID.P);
hc.ma.anno <- as.character(rc.integrated$Rad.ID.P);
# ICI annotation
hc.ici.anno <- as.character(rc.integrated$ici_celltype);
# Marker-based annotation
hc.km.anno <- as.character(rc.integrated$escore.crude.anno)
hc.ms.anno <- as.character(rc.integrated$score.anno)
# Spatial annotation
hc.sp.anno <- as.character(rc.integrated$novo.anno)
hc.sp.anno[which(rc.integrated$novo.anno == "Unknown")] = "NA";
# Root Cap Spatial annotation
hc.rcsp.anno <- as.character(rc.integrated$annotation_ncfqc)
hc.rcsp.anno[which(rc.integrated$annotation_ncfqc == "Unknown")] = "NA";
# Merge and unify labels of annotation
hc.km.anno[which(hc.km.anno == "Protoxylem")]="Xylem"
hc.km.anno[which(hc.km.anno == "Metaxylem")]="Xylem"
hc.ms.anno[which(hc.ms.anno == "Protoxylem")]="Xylem"
hc.ms.anno[which(hc.ms.anno == "Metaxylem")]="Xylem"
hc.ma.anno[which(hc.ma.anno == "Hair Cell")]="Trichoblast"
hc.ma.anno[which(hc.ma.anno == "LRC")]="Lateral Root Cap"
hc.ma.anno[which(hc.ma.anno == "Mat.Xylem")]="Xylem"
hc.ma.anno[which(hc.ma.anno == "Meri.Xylem")]="Xylem"
hc.ma.anno[which(hc.ma.anno == "Non-Hair Cell")]="Atrichoblast"
hc.ma.anno[which(hc.ma.anno == "Phloem [S32]")]="Phloem Sieve Element"
hc.ma.anno[which(hc.ma.anno == "Phloem [SUC2]")]="Metaphloem & Companion Cell"
hc.ma.anno[which(hc.ma.anno == "QC")]="Quiescent Center"
hc.rna.anno[which(hc.rna.anno == "columella")]="Columella"
hc.rna.anno[which(hc.rna.anno == "developing cortex")]="Cortex"
hc.rna.anno[which(hc.rna.anno == "developing xylem")]="Xylem"
hc.rna.anno[which(hc.rna.anno == "endodermis & QC cells")]="Endodermis"
hc.rna.anno[which(hc.rna.anno == "hair cells")]="Trichoblast"
hc.rna.anno[which(hc.rna.anno == "LRC & non-hair cells")]="Lateral Root Cap"
hc.rna.anno[which(hc.rna.anno == "matured cortex")]="Cortex"
hc.rna.anno[which(hc.rna.anno == "matured endodermis")]="Endodermis"
hc.rna.anno[which(hc.rna.anno == "matured xylem pole")]="Xylem"
hc.rna.anno[which(hc.rna.anno == "non-hair cells")]="Atrichoblast"
hc.rna.anno[which(hc.rna.anno == "phloem & companion cells")]="Phloem"
hc.rna.anno[which(hc.rna.anno == "phloem pole pericycle")]="Phloem Pole Pericycle"
hc.rna.anno[which(hc.rna.anno == "protophloem & metaphloem")]="Phloem Sieve Element"
hc.rna.anno[which(hc.rna.anno == "QC cells")]="Quiescent Center"
hc.ici.anno[which(hc.ici.anno == "Mature_Cortex")]="Cortex"
hc.ici.anno[which(hc.ici.anno == "Phloem_Pole_Pericycle")]="Phloem Pole Pericycle"
hc.ici.anno[which(hc.ici.anno == "Maturing_Xylem")]="Xylem"
hc.ici.anno[which(hc.ici.anno == "Developing_Cortex")]="Cortex"
hc.ici.anno[which(hc.ici.anno == "Phloem_Companion_Cells")]="Metaphloem & Companion Cell"
hc.ici.anno[which(hc.ici.anno == "Meristematic_Xylem")]="Xylem"
hc.ici.anno[which(hc.ici.anno == "Quiescent_Center")]="Quiescent Center"
hc.ici.anno[which(hc.ici.anno == "Lateral_Root_Cap")]="Lateral Root Cap"
hc.ici.anno[which(hc.ici.anno == "Xylem_Pole_Pericycle")]="Xylem Pole Pericycle"
hc.ici.anno[which(hc.ici.anno == "Phloem_Companion_Cells")]="Metaphloem & Companion Cell"
# Merge annotations and metadata
cb.anno <- data.frame(hc.rna.anno=hc.rna.anno,hc.ma.anno=hc.ma.anno,hc.ici.anno=hc.ici.anno,hc.km.anno=hc.km.anno,hc.sp.anno=hc.sp.anno, hc.rcsp.anno=hc.rcsp.anno,hc.ms.anno=hc.ms.anno)
num.unique <- apply(cb.anno,1,function(x){length(unique(x[which(x!="NA")]))})
num.not.NA <- apply(cb.anno,1,function(x){length(which(x!="NA"))})
ici.not.NA <- apply(cb.anno,1,function(x){length(which(x[3]!="NA"))})
km.not.NA <- apply(cb.anno,1,function(x){length(which(x[4]!="NA"))})
ms.not.NA <- apply(cb.anno,1,function(x){length(which(x[7]!="NA"))})
cb.anno$num.unique <- num.unique
cb.anno$num.not.NA <- num.not.NA
cb.anno$ici.not.NA <- ici.not.NA
cb.anno$km.not.NA <- km.not.NA
cb.anno$ms.not.NA <- ms.not.NA
cb.anno$Cell_ID <- colnames(rc.integrated)
cb.anno$order <- seq(1,ncol(rc.integrated))
# Combined annotation for root cap
cb.anno.ground <- cb.anno %>% filter(hc.ms.anno == "Ground Tissue")
cb.anno.rootcap <- cb.anno %>% filter(hc.km.anno == "Root Cap" | hc.km.anno == "Columella")
# For making consensus annotation, we only keep those with correlation coefficient > 0.6 and adjusted p-value/p-value < 0.01
# Correlation-based annotation
hc.rna.anno <- as.character(rc.integrated$celltype.ID.P);
hc.rna.anno[which(rc.integrated$celltype.pvalue.P > 0.01)] = "NA";
hc.rna.anno[which(rc.integrated$celltype.cor.P < 0.6)] = "NA";
hc.ma.anno <- as.character(rc.integrated$Rad.ID.P);
hc.ma.anno[which(rc.integrated$Rad.pvalue.P > 0.01)] = "NA";
hc.ma.anno[which(rc.integrated$Rad.cor.P < 0.6)] = "NA";
# ICI annotation
hc.ici.anno <- as.character(rc.integrated$ici_celltype);
hc.ici.anno[which(rc.integrated$ici_celltype_p_adj > 0.01)] = "NA";
# Marker-based annotation
hc.km.anno <- as.character(rc.integrated$escore.crude.anno)
hc.ms.anno <- as.character(rc.integrated$score.anno)
# Spatial annotation
hc.sp.anno <- as.character(rc.integrated$novo.anno)
hc.sp.anno[which(rc.integrated$novo.anno == "Unknown")] = "NA";
# Root Cap Spatial annotation
hc.rcsp.anno <- as.character(rc.integrated$annotation_ncfqc)
hc.rcsp.anno[which(rc.integrated$annotation_ncfqc == "Unknown")] = "NA";
# Merge and unify labels of annotation
hc.km.anno[which(hc.km.anno == "Protoxylem")]="Xylem"
hc.km.anno[which(hc.km.anno == "Metaxylem")]="Xylem"
hc.ms.anno[which(hc.ms.anno == "Protoxylem")]="Xylem"
hc.ms.anno[which(hc.ms.anno == "Metaxylem")]="Xylem"
hc.ma.anno[which(hc.ma.anno == "Hair Cell")]="Trichoblast"
hc.ma.anno[which(hc.ma.anno == "LRC")]="Lateral Root Cap"
hc.ma.anno[which(hc.ma.anno == "Mat.Xylem")]="Xylem"
hc.ma.anno[which(hc.ma.anno == "Meri.Xylem")]="Xylem"
hc.ma.anno[which(hc.ma.anno == "Non-Hair Cell")]="Atrichoblast"
hc.ma.anno[which(hc.ma.anno == "Phloem [S32]")]="Phloem Sieve Element"
hc.ma.anno[which(hc.ma.anno == "Phloem [SUC2]")]="Metaphloem & Companion Cell"
hc.ma.anno[which(hc.ma.anno == "QC")]="Quiescent Center"
hc.rna.anno[which(hc.rna.anno == "columella")]="Columella"
hc.rna.anno[which(hc.rna.anno == "developing cortex")]="Cortex"
hc.rna.anno[which(hc.rna.anno == "developing xylem")]="Xylem"
hc.rna.anno[which(hc.rna.anno == "endodermis & QC cells")]="Endodermis"
hc.rna.anno[which(hc.rna.anno == "hair cells")]="Trichoblast"
hc.rna.anno[which(hc.rna.anno == "LRC & non-hair cells")]="Lateral Root Cap"
hc.rna.anno[which(hc.rna.anno == "matured cortex")]="Cortex"
hc.rna.anno[which(hc.rna.anno == "matured endodermis")]="Endodermis"
hc.rna.anno[which(hc.rna.anno == "matured xylem pole")]="Xylem"
hc.rna.anno[which(hc.rna.anno == "non-hair cells")]="Atrichoblast"
hc.rna.anno[which(hc.rna.anno == "phloem & companion cells")]="Phloem"
hc.rna.anno[which(hc.rna.anno == "phloem pole pericycle")]="Phloem Pole Pericycle"
hc.rna.anno[which(hc.rna.anno == "protophloem & metaphloem")]="Phloem Sieve Element"
hc.rna.anno[which(hc.rna.anno == "QC cells")]="Quiescent Center"
hc.ici.anno[which(hc.ici.anno == "Mature_Cortex")]="Cortex"
hc.ici.anno[which(hc.ici.anno == "Phloem_Pole_Pericycle")]="Phloem Pole Pericycle"
hc.ici.anno[which(hc.ici.anno == "Maturing_Xylem")]="Xylem"
hc.ici.anno[which(hc.ici.anno == "Developing_Cortex")]="Cortex"
hc.ici.anno[which(hc.ici.anno == "Phloem_Companion_Cells")]="Metaphloem & Companion Cell"
hc.ici.anno[which(hc.ici.anno == "Meristematic_Xylem")]="Xylem"
hc.ici.anno[which(hc.ici.anno == "Quiescent_Center")]="Quiescent Center"
hc.ici.anno[which(hc.ici.anno == "Lateral_Root_Cap")]="Lateral Root Cap"
hc.ici.anno[which(hc.ici.anno == "Xylem_Pole_Pericycle")]="Xylem Pole Pericycle"
hc.ici.anno[which(hc.ici.anno == "Phloem_Companion_Cells")]="Metaphloem & Companion Cell"
# Merge annotations and metadata
cb.anno <- data.frame(hc.rna.anno=hc.rna.anno,hc.ma.anno=hc.ma.anno,hc.ici.anno=hc.ici.anno,hc.km.anno=hc.km.anno,hc.sp.anno=hc.sp.anno,hc.rcsp.anno=hc.rcsp.anno,hc.ms.anno=hc.ms.anno)
num.unique <- apply(cb.anno,1,function(x){length(unique(x[which(x!="NA")]))})
num.not.NA <- apply(cb.anno,1,function(x){length(which(x!="NA"))})
ici.not.NA <- apply(cb.anno,1,function(x){length(which(x[3]!="NA"))})
km.not.NA <- apply(cb.anno,1,function(x){length(which(x[4]!="NA"))})
ms.not.NA <- apply(cb.anno,1,function(x){length(which(x[7]!="NA"))})
cb.anno$num.unique <- num.unique
cb.anno$num.not.NA <- num.not.NA
cb.anno$ici.not.NA <- ici.not.NA
cb.anno$km.not.NA <- km.not.NA
cb.anno$ms.not.NA <- ms.not.NA
cb.anno$Cell_ID <- colnames(rc.integrated)
cb.anno$order <- seq(1,ncol(rc.integrated))
cb.anno.ground$annotation <- apply(cb.anno.ground,1,function(x){if (x[5]=="NA"){"Unknown"}else if(x[5]=="Endodermis"){"Endodermis"}else if(sort(table(as.character(x[1:5])), decreasing=TRUE)[1] > 1) {names(sort(table(as.character(x[1:5])), decreasing=TRUE))[1]}else{"Unknown"}})
cb.anno.ground$annotation[which(cb.anno.ground$annotation!="Cortex" & cb.anno.ground$annotation!="Endodermis")] = "Unknown"
cb.anno.rootcap$annotation <- apply(cb.anno.rootcap,1,function(x){if(sort(table(as.character(x[1:5])), decreasing=TRUE)[1] > 1) {names(sort(table(as.character(x[1:5])), decreasing=TRUE))[1]}else{"Unknown"}})
cb.anno.rootcap$annotation[which(cb.anno.rootcap$annotation=="Atrichoblast" | cb.anno.rootcap$annotation=="Trichoblast")] = "Lateral Root Cap"
cb.anno.rootcap$annotation[which(cb.anno.rootcap$annotation!="Columella" & cb.anno.rootcap$annotation!="Lateral Root Cap")] = "Unknown"
cb.anno.rootcap.sub <- cb.anno.rootcap[which(cb.anno.rootcap$hc.sp.anno==cb.anno.rootcap$annotation),]
cb.anno.rootcap.sub.2 <- cb.anno.rootcap.sub
cb.anno.rootcap.sub.2$annotation <- cb.anno.rootcap.sub.2$hc.rcsp.anno
# Voting, a cell needs at least two supports from different annotation methods in order to remain annotated. Marker-based annotation is given higher weight on final annotation decision
cb.anno.sub <- cb.anno %>% filter((num.not.NA >= 2 & num.unique <=2 & ici.not.NA==1) | (num.not.NA >= 2 & num.unique <=2 & km.not.NA==1)) %>% filter(hc.ms.anno != "Ground Tissue") %>% filter(hc.km.anno != "Root Cap") %>% filter(hc.km.anno != "Columella")
cb.anno.sub$annotation <- apply(cb.anno.sub,1,function(x){if(x[4]=="Procambium"){"Procambium"}else if(x[4]=="Atrichoblast"){"Atrichoblast"}else if(x[4]=="Metaphloem & Companion Cell" & x[7]=="Metaphloem & Companion Cell"){"Metaphloem & Companion Cell"}else if(x[4]=="Xylem Pole Pericycle"){"Xylem Pole Pericycle"}else if(x[4]=="Phloem Pole Pericycle"){"Phloem Pole Pericycle"}else if(x[4]=="Phloem Sieve Element" & x[7]=="Phloem Sieve Element"){"Phloem Sieve Element"}else if(x[3]==x[7] & x[1] == x[2] & x[1]==x[3]){x[7]} else if (x[3]==x[7] & x[3]==x[1]){x[7]} else if (x[3]==x[7] & x[3]==x[2]){x[7]} else if (x[1]==x[2] & x[1]==x[7]){x[7]} else if (x[1]==x[2] & x[1]==x[3]){x[3]}else if(x[3]=="Atrichoblast"){"Atrichoblast"} else {x[7]}})
for (i in which(cb.anno.sub$annotation == "Phloem Pole Pericycle")) {
if (cb.anno.sub$hc.km.anno[i]=="Xylem Pole Pericycle") {
cb.anno.sub$annotation[i] = "NA"
}
}
for (i in which(cb.anno.sub$annotation == "Root Cap")) {
cb.anno.sub$annotation[i] = "NA"
}
cb.anno.sub <- rbind(cb.anno.sub, cb.anno.ground, cb.anno.rootcap.sub)
cb.anno.sub$annotation[which(cb.anno.sub$annotation == "NA")] = "Unknown"
cb.anno.sub$annotation[which(cb.anno.sub$annotation == "Quiescent Center")] = "Unknown"
cb.anno.sub$annotation[which(cb.anno.sub$annotation == "Phloem Sieve Element")] = "Protophloem"
cb.anno.sub$annotation[which(cb.anno.sub$annotation == "Xylem")] <- as.character(rc.integrated$score.anno[as.numeric(cb.anno.sub$order)[which(cb.anno.sub$annotation=="Xylem")]])
rc.integrated$cb.anno <- rep("Unknown", ncol(rc.integrated))
rc.integrated$cb.anno[as.numeric(cb.anno.sub$order)] <- as.character(cb.anno.sub$annotation)
# Annotation on QC
cc.z.score <- apply(rc.integrated@assays$SCT@data[cc.genes,], 1, zscore)
qc.z.score <- apply(rc.integrated@assays$SCT@data[known.good.markers[which(known.good.markers$Celltype=="Quiescent Center"),]$Locus,], 1, zscore)
cczs <- apply(cc.z.score[which(rc.integrated$escore.crude.anno=="Quiescent Center"),], 1, mean)
qczs <- apply(qc.z.score[which(rc.integrated$escore.crude.anno=="Quiescent Center"),], 1, mean)
cczs.all <- apply(cc.z.score, 1, mean)
qczs.all <- apply(qc.z.score, 1, mean)
QC.idx <- match(names(which(cczs <= mean(cczs.all[which(qczs.all >= 0)])+1.5*IQR(cczs.all[which(qczs.all >= 0)]) & qczs >=0)), colnames(rc.integrated))
rc.integrated$cb.anno[QC.idx] <- "Quiescent Center"
# Plot initial consensus annotation
order <- c("Quiescent Center", "Ground Tissue","Columella", "Lateral Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Metaphloem & Companion Cell","Protophloem", "Xylem", "Procambium","Xylem Pole Pericycle","Phloem Pole Pericycle", "Protoxylem", "Metaxylem", "Unknown")
palette <- c("#9400d3", "#DCD0FF","#5ab953", "#bfef45", "#008080", "#21B6A8", "#82b6ff", "#0000FF","#e6194b", "#dd77ec", "#9a6324", "#ffe119", "#ff9900", "#ffd4e3", "#9a6324", "#ddaa6f", "#EEEEEE")
rc.integrated$cb.anno <- factor(rc.integrated$cb.anno, levels = order[sort(match(unique(rc.integrated$cb.anno),order))])
color <- palette[sort(match(unique(rc.integrated$cb.anno),order))]
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", group.by = "cb.anno", cols = color)+ggtitle("consensus annotation")
# Find clusters, here we choose SLM clustering algorithm with resolution 0.5. Parameter "algorithm": 1 = original Louvain algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM algorithm; 4 = Leiden algorithm
suppressMessages(suppressWarnings(
rc.integrated <- FindClusters(rc.integrated, resolution = 0.5, algorithm = 3)
))
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", label=TRUE)+NoLegend()
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 110427 Number of edges: 3393008 Running smart local moving algorithm... Maximum modularity in 10 random starts: 0.9584 Number of communities: 29 Elapsed time: 78 seconds
# Only denoise clusters that contain one dominant labels
for (i in c("2","4","7","10","16","17","18","20","21","22","24","25","26","27","28")){
rc.integrated$cb.anno[which(rc.integrated$seurat_clusters==i)] <- names(which.max(table(rc.integrated$cb.anno[which(rc.integrated$seurat_clusters==i)][-which(rc.integrated$cb.anno[which(rc.integrated$seurat_clusters==i)]=="Unknown")])))
}
# Plot final consensus annotation
order <- c("Quiescent Center", "Ground Tissue","Columella", "Lateral Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Metaphloem & Companion Cell","Protophloem", "Xylem", "Procambium","Xylem Pole Pericycle","Phloem Pole Pericycle", "Protoxylem", "Metaxylem", "Unknown")
palette <- c("#9400d3", "#DCD0FF","#5ab953", "#bfef45", "#008080", "#21B6A8", "#82b6ff", "#0000FF","#e6194b", "#dd77ec", "#9a6324", "#ffe119", "#ff9900", "#ffd4e3", "#9a6324", "#ddaa6f", "#EEEEEE")
rc.integrated$cb.anno <- factor(rc.integrated$cb.anno, levels = order[sort(match(unique(rc.integrated$cb.anno),order))])
color <- palette[sort(match(unique(rc.integrated$cb.anno),order))]
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", group.by = "cb.anno", cols = color)+ggtitle("curated by clusters")
rc.integrated$cb.anno <- as.character(rc.integrated$cb.anno)
rc.integrated$cb.anno[as.numeric(cb.anno.rootcap.sub.2$order)] <- as.character(cb.anno.rootcap.sub.2$annotation)
cb.anno$annotation <- rc.integrated$cb.anno
cb.anno$annotation[which(cb.anno$annotation == "Columella")] = "Unknown"
cb.anno$annotation[which(cb.anno$annotation == "Columella_1")] = "Unknown"
cb.anno$annotation[which(cb.anno$annotation == "EPI_LRC_1")] = "Unknown"
cb.anno$annotation[which(cb.anno$annotation == "Quiescent Center")] = "Unknown"
table(cb.anno$annotation)
Atrichoblast Columella_2
12173 114
Columella_3 Columella_4
113 25
Columella_5 Columella_6
66 364
Cortex Endodermis
10171 9217
LRC_2 LRC_3
524 545
LRC_4 LRC_5
2527 1106
Metaphloem & Companion Cell Metaxylem
1463 579
Phloem Pole Pericycle Procambium
2182 8997
Protophloem Protoxylem
430 3034
Trichoblast Unknown
11167 39020
Xylem Pole Pericycle
6610
# Merge cell type consensus annotation with time zone correlation-based annotation
long.merge <- as.character(rc.integrated$Long.ID.P)
cb.anno.t <- data.frame(rna=as.character(rc.integrated$timezone.ID.P), ma=long.merge)
cb.anno.t$order <- seq(1,ncol(rc.integrated))
cb.anno.t$annotation <- apply(cb.anno.t,1,function(x){if(TRUE){x[2]}else{"Unknown"}})
cb.anno.t.sub <- cb.anno.t %>% filter(annotation!="Unknown")
cb.anno.m <- left_join(cb.anno,cb.anno.t.sub,by="order")
cb.anno.m <- cb.anno.m %>% filter(annotation.x != "NA" & annotation.x != "Unknown" & annotation.y != "NA")
# Disect each cell type based on microarray time zone correlation-based annotation to have more grouping of cells/ to give finest resolution
cb.anno.m$final.annotation <- paste(cb.anno.m$annotation.y, cb.anno.m$annotation.x, sep="-")
# Filter grouping that has too few cells
cb.anno.m$final.annotation[grep(paste(names(table(cb.anno.m$final.annotation)[which(table(cb.anno.m$final.annotation) <= 2)]),collapse = "|"),cb.anno.m$final.annotation)] = "Unknown"
cb.anno.m <- cb.anno.m %>% filter(final.annotation != "Unknown")
length(sort(table(cb.anno.m$final.annotation)))
cb.anno.m$final.annotation[grep("Columella_2$",cb.anno.m$final.annotation)] = "Columella_2"
cb.anno.m$final.annotation[grep("Columella_3$",cb.anno.m$final.annotation)] = "Columella_3"
cb.anno.m$final.annotation[grep("Columella_4$",cb.anno.m$final.annotation)] = "Columella_4"
cb.anno.m$final.annotation[grep("Columella_5$",cb.anno.m$final.annotation)] = "Columella_5"
cb.anno.m$final.annotation[grep("Columella_6$",cb.anno.m$final.annotation)] = "Columella_6"
cb.anno.m$final.annotation[grep("^Elong-[0-9]-LRC_[2-3]$",cb.anno.m$final.annotation)] = "Unknown"
cb.anno.m$final.annotation[grep("^Mat-[0-9][0-9]-LRC_[2-3]$",cb.anno.m$final.annotation)] = "Unknown"
cb.anno.m$final.annotation[grep("^Meri-[0-9]-LRC_[4-5]$",cb.anno.m$final.annotation)] = "Unknown"
cb.anno.m$final.annotation[grep("^Columella-LRC_[4-5]$",cb.anno.m$final.annotation)] = "Unknown"
cb.anno.m$final.annotation[grep("LRC_4$",cb.anno.m$final.annotation)] = "LRC_4"
cb.anno.m$final.annotation[grep("LRC_5$",cb.anno.m$final.annotation)] = "LRC_5"
cb.anno.m <- cb.anno.m %>% filter(final.annotation != "Unknown")
rc.integrated$cb.anno <- rep("Unknown", ncol(rc.integrated))
rc.integrated$cb.anno[as.numeric(cb.anno.m$order)] <- as.character(cb.anno.m$final.annotation)
# 106 new reference expression profiles are going to be build
length(sort(table(rc.integrated$cb.anno[which(rc.integrated$cb.anno!="Unknown")])))
sort(table(rc.integrated$cb.anno[which(rc.integrated$cb.anno!="Unknown")]))
Mat-12-Cortex Meri-1-Protoxylem
3 3
Meri-4-LRC_3 Meri-5-Protophloem
3 3
Meri-6-Endodermis Meri-6-Metaxylem
3 3
Meri-3-Metaphloem & Companion Cell Meri-6-LRC_2
4 4
Mat-11-Protophloem Meri-4-Endodermis
5 5
Meri-4-Metaphloem & Companion Cell Elong-8-Atrichoblast
5 6
Mat-10-Protophloem Mat-9-Atrichoblast
6 6
Meri-4-Metaxylem Meri-4-Trichoblast
6 7
Meri-6-LRC_3 Meri-2-LRC_2
7 8
Meri-2-Xylem Pole Pericycle Meri-4-Atrichoblast
8 8
Meri-2-Endodermis Meri-2-LRC_3
10 10
Meri-3-Procambium Meri-3-Protoxylem
11 13
Mat-11-Phloem Pole Pericycle Mat-11-Protoxylem
14 17
Meri-1-LRC_3 Mat-9-Cortex
19 20
Meri-5-Protoxylem Columella_4
21 22
Meri-6-Protoxylem Meri-1-LRC_2
24 28
Mat-11-Endodermis Meri-2-Cortex
30 31
Meri-6-Protophloem Elong-8-Trichoblast
35 37
Meri-5-Phloem Pole Pericycle Meri-6-Xylem Pole Pericycle
38 38
Mat-10-Metaphloem & Companion Cell Mat-10-Phloem Pole Pericycle
46 50
Meri-3-Endodermis Meri-5-Metaxylem
51 51
Mat-11-Metaphloem & Companion Cell Meri-3-Metaxylem
53 54
Columella_5 Elong-8-Cortex
59 62
Meri-5-Metaphloem & Companion Cell Meri-6-Phloem Pole Pericycle
73 74
Meri-5-Procambium Meri-6-Metaphloem & Companion Cell
84 91
Mat-10-Xylem Pole Pericycle Elong-7-Metaxylem
101 103
Mat-11-Procambium Meri-5-Endodermis
104 104
Meri-5-LRC_3 Meri-6-Procambium
104 106
Columella_3 Columella_2
111 112
Meri-5-LRC_2 Elong-8-Protophloem
116 120
Meri-2-Atrichoblast Meri-3-Cortex
131 137
Mat-11-Xylem Pole Pericycle Meri-3-LRC_2
141 170
Meri-3-LRC_3 Elong-7-Metaphloem & Companion Cell
179 234
Elong-7-Protophloem Meri-5-Cortex
260 306
Mat-11-Cortex Elong-7-Phloem Pole Pericycle
320 321
Mat-11-Trichoblast Columella_6
327 360
Elong-8-Metaxylem Meri-5-Xylem Pole Pericycle
362 365
Mat-9-Endodermis Mat-10-Protoxylem
453 526
Mat-11-Atrichoblast Mat-10-Procambium
543 554
Meri-3-Trichoblast Meri-6-Trichoblast
613 647
Elong-7-Xylem Pole Pericycle Meri-3-Xylem Pole Pericycle
658 793
Elong-8-Metaphloem & Companion Cell Mat-9-Trichoblast
956 957
LRC_5 LRC_4
960 1021
Meri-3-Atrichoblast Elong-8-Protoxylem
1106 1149
Meri-6-Atrichoblast Elong-7-Protoxylem
1254 1278
Elong-7-Procambium Meri-6-Cortex
1561 1634
Elong-8-Phloem Pole Pericycle Meri-5-Atrichoblast
1684 1708
Meri-5-Trichoblast Mat-10-Cortex
1922 2035
Mat-10-Endodermis Mat-10-Trichoblast
2527 2561
Elong-7-Endodermis Mat-10-Atrichoblast
2743 2879
Elong-8-Endodermis Elong-7-Trichoblast
3291 4095
Elong-8-Xylem Pole Pericycle Elong-7-Atrichoblast
4504 4532
Elong-7-Cortex Elong-8-Procambium
5621 6573
# Extract integrated (batch-corrected) expression matrix
afm <- rc.integrated@assays$integrated@data
# Pool (average) expression values of each grouping
new_ref <- matrix(nrow=nrow(afm), ncol = 0)
for (i in 1:106) {
m <- afm[,which(rc.integrated$cb.anno==as.character(unique(rc.integrated$cb.anno[which(rc.integrated$cb.anno!="Unknown")]))[i])]
new_ref <- cbind(new_ref, rowSums(m)/ncol(m))
}
colnames(new_ref) <- as.character(unique(rc.integrated$cb.anno[which(rc.integrated$cb.anno!="Unknown")]))
gene.var <- apply(new_ref,1,var)
# Select top 200 highly variable genes
new_ref_sub <- new_ref[names(sort(gene.var,decreasing = TRUE)[1:200]),]
# Merge newly-built reference with atlas
merge.rownames <- function (x,y){
dat <- merge(x = x, y = y, by = "row.names")
rownames(dat) <- dat$Row.names
dat <- dat[,-1]
return(dat)
}
nr <- Reduce(merge.rownames, list(new_ref_sub,afm))
nr <- as.matrix(nr)
colnames(nr)[1:107]
# Correlation-based annotation using newly-built references
nr_label=colnames(new_ref)
nr_stat <- suppressWarnings(sapply(107:ncol(nr), function(i) sapply(1:106, function(j) cor.test(nr[,i],nr[,j],method = "pearson")[c(3,4)])))
nr_cor <- nr_stat[seq(2,nrow(nr_stat),2),]
nr_pvalue <- nr_stat[seq(1,nrow(nr_stat)-1,2),]
nr_max <- sapply(1:(ncol(nr)-106), function(i) max(as.numeric(nr_cor[,i])))
nr_ident <- sapply(1:(ncol(nr)-106), function(i) nr_label[which(as.numeric(nr_cor[,i])==max(as.numeric(nr_cor[,i])))])
nr_maxp <- sapply(1:(ncol(nr)-106), function(i) as.numeric(nr_pvalue[,i])[which(as.numeric(nr_cor[,i])==max(as.numeric(nr_cor[,i])))])
names(nr_max) <- nr_ident
rc.integrated$final.anno.P <- as.character(nr_ident)
rc.integrated$final.anno.cor.P <- as.numeric(nr_max)
rc.integrated$final.anno.pvalue.P <- as.numeric(nr_maxp)
rc.integrated$celltype.anno <- gsub("LRC","Lateral Root Cap",gsub("_[0-9]$","",gsub("^Meri-.*-","",gsub("^Elong-.*-","",gsub("^Mat-.*-","",as.character(rc.integrated$final.anno.P))))))
# Annotation on QC
zscore <- function(x){(x-mean(x))/sd(x)}
cc.z.score <- apply(rc.integrated@assays$SCT@data[cc.genes,], 1, zscore)
qc.z.score <- apply(rc.integrated@assays$SCT@data[known.good.markers[which(known.good.markers$Celltype=="Quiescent Center"),]$Locus,], 1, zscore)
cczs <- apply(cc.z.score[which(rc.integrated$escore.crude.anno=="Quiescent Center"),], 1, mean)
qczs <- apply(qc.z.score[which(rc.integrated$escore.crude.anno=="Quiescent Center"),], 1, mean)
cczs.all <- apply(cc.z.score, 1, mean)
qczs.all <- apply(qc.z.score, 1, mean)
QC.idx <- match(names(which(cczs <= mean(cczs.all[which(qczs.all >= 0)])+1.5*IQR(cczs.all[which(qczs.all >= 0)]) & qczs >=0)), colnames(rc.integrated))
rc.integrated$celltype.anno[QC.idx] <- "Quiescent Center"
# Only denoise clusters that contain one dominant labels
for (i in c("2","4","7","10","16","17","18","20","21","22","24","25","26","27","28")){
rc.integrated$celltype.anno[which(rc.integrated$seurat_clusters==i)] <- names(which.max(table(rc.integrated$celltype.anno[which(rc.integrated$seurat_clusters==i)])))
}
# Plot new correlation-based annotation (final annotation)
order <- c("Quiescent Center", "Ground Tissue","Columella", "Lateral Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Metaphloem & Companion Cell","Protophloem", "Xylem", "Procambium","Xylem Pole Pericycle","Phloem Pole Pericycle", "Protoxylem", "Metaxylem", "Unknown")
palette <- c("#9400d3", "#DCD0FF","#5ab953", "#bfef45", "#008080", "#21B6A8", "#82b6ff", "#0000FF","#e6194b", "#dd77ec", "#9a6324", "#ffe119", "#ff9900", "#ffd4e3", "#9a6324", "#ddaa6f", "#EEEEEE")
rc.integrated$celltype.anno <- factor(rc.integrated$celltype.anno, levels = order[sort(match(unique(rc.integrated$celltype.anno),order))])
color <- palette[sort(match(unique(rc.integrated$celltype.anno),order))]
options(repr.plot.width=12, repr.plot.height=10)
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.anno", cols = color)+ggtitle("celltype.anno")
table(rc.integrated$celltype.anno)
Quiescent Center Columella
158 8535
Lateral Root Cap Atrichoblast
18396 13380
Trichoblast Cortex
12361 11073
Endodermis Metaphloem & Companion Cell
11369 3224
Protophloem Procambium
939 9307
Xylem Pole Pericycle Phloem Pole Pericycle
12213 4634
Protoxylem Metaxylem
3195 1643
# Plot QC
temp <- rep("NA", ncol(rc.integrated))
temp[QC.idx]="Quiescent Center"
rc.integrated$temp <- temp
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", group.by = "temp", order = c("Quiescent Center","NA"),cols=c("#cccccc", "#ff4040"), pt.size=1.5)
table(rc.integrated$orig.ident[QC.idx])
col0 dc1 dc2 pp1 sc_1 sc_10_at sc_11 sc_12
11 19 22 8 3 12 24 6
sc_31 sc_40 sc_51 sc_9_at tnw1 tnw2
10 2 12 9 9 11
# Plot correlation coefficient
rc.integrated$final.anno.P_significance <- rep(">= 0.6", nrow(rc.integrated@meta.data))
rc.integrated$final.anno.P_significance[which(rc.integrated$final.anno.cor.P < 0.6)] = "< 0.6"
FeaturePlot(rc.integrated, reduction = "umap", features = "final.anno.cor.P")
DimPlot(rc.integrated, reduction = "umap", group.by = "final.anno.P_significance", order = c(">= 0.6","< 0.6"),cols = c("#cccccc", "#008081"))
temp <- rep("NA", ncol(rc.integrated))
temp[grep("^Meri",rc.integrated$final.anno.P)]="Meristem"
temp[grep("^Elong",rc.integrated$final.anno.P)]="Elongation"
temp[grep("^Mat",rc.integrated$final.anno.P)]="Maturation"
temp[grep("^Columella",rc.integrated$final.anno.P)]="Maturation"
temp[grep("LRC_2$",rc.integrated$final.anno.P)]="LRC_2"
temp[grep("LRC_3$",rc.integrated$final.anno.P)]="LRC_3"
temp[grep("LRC_4$",rc.integrated$final.anno.P)]="LRC_4"
temp[grep("LRC_5$",rc.integrated$final.anno.P)]="LRC_5"
temp[grep("Columella_6$",rc.integrated$final.anno.P)]="Columella_6"
temp[grep("Columella_5$",rc.integrated$final.anno.P)]="Columella_5"
temp[grep("Columella_3$",rc.integrated$final.anno.P)]="Columella_3"
temp[grep("Columella_2$",rc.integrated$final.anno.P)]="Columella_2"
temp[grep("Columella_4$",rc.integrated$final.anno.P)]="Columella_4"
#temp[QC.idx]="Meristem"
rc.integrated$time.anno.fine <- temp
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", group.by = "time.anno.fine",order = c("Columella_6","Columella_5","Columella_4","Columella_3","Columella_2","LRC_5","LRC_4","LRC_3","LRC_2","Maturation","Elongation","Meristem"),cols = c("#e5f5e0", "#a1d99b", "#31a354", "#eff3ff", "#bdd7e7", "#6baed6", "#2171b5", '#FDEB73','#F6C15B','#ED9445','#E66731','#B84A29'))
# Merge layers of root caps
temp <- rep("NA", ncol(rc.integrated))
temp[grep("^Meri",rc.integrated$final.anno.P)]="Meristem"
temp[grep("^Elong",rc.integrated$final.anno.P)]="Elongation"
temp[grep("^Mat",rc.integrated$final.anno.P)]="Maturation"
temp[grep("^Columella",rc.integrated$final.anno.P)]="Maturation"
temp[grep("LRC_2$",rc.integrated$final.anno.P)]="Proximal Lateral Root Cap"
temp[grep("LRC_3$",rc.integrated$final.anno.P)]="Proximal Lateral Root Cap"
temp[grep("LRC_4$",rc.integrated$final.anno.P)]="Distal Lateral Root Cap"
temp[grep("LRC_5$",rc.integrated$final.anno.P)]="Distal Lateral Root Cap"
temp[grep("Columella_6$",rc.integrated$final.anno.P)]="Distal Columella"
temp[grep("Columella_5$",rc.integrated$final.anno.P)]="Distal Columella"
temp[grep("Columella_4$",rc.integrated$final.anno.P)]="Proximal Columella"
temp[grep("Columella_3$",rc.integrated$final.anno.P)]="Proximal Columella"
temp[grep("Columella_2$",rc.integrated$final.anno.P)]="Proximal Columella"
rc.integrated$time.anno <- temp
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", group.by = "time.anno",order = c("Distal Columella","Proximal Columella","Distal Lateral Root Cap","Proximal Lateral Root Cap","Maturation","Elongation","Meristem"),cols = c("#e5f5e0", "#a1d99b", "#31a354", "#deebf7", "#3182bd", '#fee0d2','#de2d26'))
rc.integrated$celltype.anno.crude <- gsub("Protoxylem","Xylem",gsub("Metaxylem","Xylem",gsub("Xylem Pole Pericycle","Pericycle",gsub("Phloem Pole Pericycle","Pericycle",gsub("Metaphloem & Companion Cell","Phloem",gsub("Protophloem", "Phloem",rc.integrated$celltype.anno))))))
# Plot merged sub-types
order <- c("Quiescent Center", "Columella", "Lateral Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Pericycle", "Phloem", "Xylem", "Procambium", "Unknown")
palette <- c("#9400d3", "#5ab953", "#bfef45", "#008080", "#21B6A8", "#82b6ff", "#0000FF","#ff9900","#e6194b", "#9a6324", "#ffe119","#EEEEEE")
rc.integrated$celltype.anno.crude <- factor(rc.integrated$celltype.anno.crude, levels = order[sort(match(unique(rc.integrated$celltype.anno.crude),order))])
color <- palette[sort(match(unique(rc.integrated$celltype.anno.crude),order))]
options(repr.plot.width=12, repr.plot.height=10)
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.anno.crude", cols = color)
rc.integrated$time.celltype.anno.crude <- paste(rc.integrated$time.anno,rc.integrated$celltype.anno.crude,sep="_")
rc.integrated$time.celltype.anno.crude <- gsub("Distal Columella_Columella","Distal Columella",rc.integrated$time.celltype.anno.crude)
rc.integrated$time.celltype.anno.crude <- gsub("Proximal Columella_Columella","Proximal Columella",rc.integrated$time.celltype.anno.crude)
rc.integrated$time.celltype.anno.crude <- gsub("Distal Lateral Root Cap_Lateral Root Cap","Distal Lateral Root Cap",rc.integrated$time.celltype.anno.crude)
rc.integrated$time.celltype.anno.crude <- gsub("Proximal Lateral Root Cap_Lateral Root Cap","Proximal Lateral Root Cap",rc.integrated$time.celltype.anno.crude)
rc.integrated$time.anno[grep("^Proximal Columella_",rc.integrated$time.celltype.anno.crude)] <- "Meristem"
rc.integrated$time.anno[grep("^Proximal Lateral Root Cap_",rc.integrated$time.celltype.anno.crude)] <- "Meristem"
rc.integrated$time.anno[grep("^Distal Columella_",rc.integrated$time.celltype.anno.crude)] <- "Maturation"
rc.integrated$time.anno[grep("^Distal Lateral Root Cap_",rc.integrated$time.celltype.anno.crude)] <- "Maturation"
options(repr.plot.width=10, repr.plot.height=8)
DimPlot(rc.integrated, reduction = "umap", group.by = "time.anno",order = c("Distal Columella","Proximal Columella","Distal Lateral Root Cap","Proximal Lateral Root Cap","Maturation","Elongation","Meristem"),cols = c("#e5f5e0", "#a1d99b", "#31a354", "#deebf7", "#3182bd", '#fee0d2','#de2d26'))
rc.integrated$celltype.anno.crude <- gsub("Protoxylem","Xylem",gsub("Metaxylem","Xylem",gsub("Xylem Pole Pericycle","Pericycle",gsub("Phloem Pole Pericycle","Pericycle",gsub("Metaphloem & Companion Cell","Phloem",gsub("Protophloem", "Phloem",rc.integrated$celltype.anno))))))
order <- c("Quiescent Center", "Columella", "Lateral Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Pericycle", "Phloem", "Xylem", "Procambium", "Unknown")
palette <- c("#9400d3", "#5ab953", "#bfef45", "#008080", "#21B6A8", "#82b6ff", "#0000FF","#ff9900","#e6194b", "#9a6324", "#ffe119","#EEEEEE")
rc.integrated$celltype.anno.crude <- factor(rc.integrated$celltype.anno.crude, levels = order[sort(match(unique(rc.integrated$celltype.anno.crude),order))])
color <- palette[sort(match(unique(rc.integrated$celltype.anno.crude),order))]
options(repr.plot.width=12, repr.plot.height=10)
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.anno.crude", cols = color)
rc.integrated$time.celltype.anno.crude <- paste(rc.integrated$time.anno,rc.integrated$celltype.anno.crude,sep="_")
rc.integrated$time.celltype.anno.crude <- gsub("Distal Columella_Columella","Distal Columella",rc.integrated$time.celltype.anno.crude)
rc.integrated$time.celltype.anno.crude <- gsub("Proximal Columella_Columella","Proximal Columella",rc.integrated$time.celltype.anno.crude)
rc.integrated$time.celltype.anno.crude <- gsub("Distal Lateral Root Cap_Lateral Root Cap","Distal Lateral Root Cap",rc.integrated$time.celltype.anno.crude)
rc.integrated$time.celltype.anno.crude <- gsub("Proximal Lateral Root Cap_Lateral Root Cap","Proximal Lateral Root Cap",rc.integrated$time.celltype.anno.crude)
(table(rc.integrated$time.celltype.anno.crude))
Distal Columella Distal Lateral Root Cap Elongation_Atrichoblast
6771 5818 4326
Elongation_Cortex Elongation_Endodermis Elongation_Pericycle
4630 6500 10008
Elongation_Phloem Elongation_Procambium Elongation_Trichoblast
2560 6470 2655
Elongation_Xylem Maturation_Atrichoblast Maturation_Cortex
3076 3375 2179
Maturation_Endodermis Maturation_Pericycle Maturation_Phloem
3116 3676 758
Maturation_Procambium Maturation_Trichoblast Maturation_Xylem
2130 4334 1108
Meristem_Atrichoblast Meristem_Cortex Meristem_Endodermis
5679 4264 1753
Meristem_Pericycle Meristem_Phloem Meristem_Procambium
3163 845 707
Meristem_Quiescent Center Meristem_Trichoblast Meristem_Xylem
158 5372 654
Proximal Columella Proximal Lateral Root Cap
1764 12578
# Save Seurat object
saveRDS(rc.integrated, file='./Root_Atlas.rds')